rm -rf cachegrind.out.*
valgrind --tool=cachegrind --I1=65536,4,64 --D1=65536,4,64 --LL=j262144,8,64 ./test8x8
find . -name "cachegrind.out.*" | xargs cg_annotate --auto=yes
perf stat -e cache-references,cache-misses,L1-dcache-load-misses,dTLB-load-misses ./test8x8